set more off

*** 1870:

import delimited using dsets\ipums_data_industries\industry_1870_to_counties90.csv, clear

rename ind50 ind1950

keep if ind1950 > 0
keep if ind1950 <= 936

gen ind1950_coarse = 0
replace ind1950_coarse = 1 if ind1950 >= 105 & ind1950 <= 126
replace ind1950_coarse = 2 if ind1950 >= 206 & ind1950 <= 239
replace ind1950_coarse = 3 if ind1950 == 246
replace ind1950_coarse = 4 if ind1950 >= 306 & ind1950 <= 499
replace ind1950_coarse = 5 if ind1950 >= 506 & ind1950 <= 598
replace ind1950_coarse = 6 if ind1950 >= 606 & ind1950 <= 699
replace ind1950_coarse = 7 if ind1950 >= 716 & ind1950 <= 756
replace ind1950_coarse = 8 if ind1950 >= 806 & ind1950 <= 817
replace ind1950_coarse = 9 if ind1950 >= 826 & ind1950 <= 849
replace ind1950_coarse = 10 if ind1950 >= 856 & ind1950 <= 859
replace ind1950_coarse = 11 if ind1950 >= 868 & ind1950 <= 899
replace ind1950_coarse = 12 if ind1950 >= 906 & ind1950 <= 936

bys cz ind1950_coarse: egen cz_totwt_ind = sum(n_ind50)

duplicates drop cz ind1950_coarse, force
keep cz ind1950_coarse cz_totwt_ind

reshape wide cz_totwt_ind, i(cz) j(ind1950_coarse)

merge 1:1 cz using dsets\cz_data\cz_to_consider_cpp.dta, keep(match) nogen

gen cz_totwt = 0
forvalues i=1/12 {
replace cz_totwt = cz_totwt + cz_totwt_ind`i'
}

forvalues i=1/12 {
gen cz_ind_share`i' = cz_totwt_ind`i' / cz_totwt
}

egen us_totwt = sum(cz_totwt)
forvalues i=1/12 {
egen us_ind_share`i' = sum(cz_totwt_ind`i' / us_totwt)
}

keep cz cz_ind_share* us_ind_share* cz_totwt_ind*

gen decade = 1870
order decade, first

save dsets\temp_datasets_todelete\temp_ipums_ind.dta, replace

*** 1890 (interpolating 1880 and 1900):

* 1880:
import delimited using dsets\ipums_data_industries\industry_1880_to_counties90.csv, clear

rename ind50 ind1950

keep if ind1950 > 0
keep if ind1950 <= 936

gen ind1950_coarse = 0
replace ind1950_coarse = 1 if ind1950 >= 105 & ind1950 <= 126
replace ind1950_coarse = 2 if ind1950 >= 206 & ind1950 <= 239
replace ind1950_coarse = 3 if ind1950 == 246
replace ind1950_coarse = 4 if ind1950 >= 306 & ind1950 <= 499
replace ind1950_coarse = 5 if ind1950 >= 506 & ind1950 <= 598
replace ind1950_coarse = 6 if ind1950 >= 606 & ind1950 <= 699
replace ind1950_coarse = 7 if ind1950 >= 716 & ind1950 <= 756
replace ind1950_coarse = 8 if ind1950 >= 806 & ind1950 <= 817
replace ind1950_coarse = 9 if ind1950 >= 826 & ind1950 <= 849
replace ind1950_coarse = 10 if ind1950 >= 856 & ind1950 <= 859
replace ind1950_coarse = 11 if ind1950 >= 868 & ind1950 <= 899
replace ind1950_coarse = 12 if ind1950 >= 906 & ind1950 <= 936

bys cz ind1950_coarse: egen aux1_cz_totwt_ind = sum(n_ind50)

duplicates drop cz ind1950_coarse, force
keep cz ind1950_coarse aux1_cz_totwt_ind

reshape wide aux1_cz_totwt_ind, i(cz) j(ind1950_coarse)

merge 1:1 cz using dsets\cz_data\cz_to_consider_cpp.dta, keep(match) nogen
save dsets\temp_datasets_todelete\temp300.dta, replace

* 1900:
import delimited using dsets\ipums_data_industries\industry_1900_to_counties90.csv, clear

rename ind50 ind1950

keep if ind1950 > 0
keep if ind1950 <= 936

gen ind1950_coarse = 0
replace ind1950_coarse = 1 if ind1950 >= 105 & ind1950 <= 126
replace ind1950_coarse = 2 if ind1950 >= 206 & ind1950 <= 239
replace ind1950_coarse = 3 if ind1950 == 246
replace ind1950_coarse = 4 if ind1950 >= 306 & ind1950 <= 499
replace ind1950_coarse = 5 if ind1950 >= 506 & ind1950 <= 598
replace ind1950_coarse = 6 if ind1950 >= 606 & ind1950 <= 699
replace ind1950_coarse = 7 if ind1950 >= 716 & ind1950 <= 756
replace ind1950_coarse = 8 if ind1950 >= 806 & ind1950 <= 817
replace ind1950_coarse = 9 if ind1950 >= 826 & ind1950 <= 849
replace ind1950_coarse = 10 if ind1950 >= 856 & ind1950 <= 859
replace ind1950_coarse = 11 if ind1950 >= 868 & ind1950 <= 899
replace ind1950_coarse = 12 if ind1950 >= 906 & ind1950 <= 936

bys cz ind1950_coarse: egen aux2_cz_totwt_ind = sum(n_ind50)

duplicates drop cz ind1950_coarse, force
keep cz ind1950_coarse aux2_cz_totwt_ind

reshape wide aux2_cz_totwt_ind, i(cz) j(ind1950_coarse)

merge 1:1 cz using dsets\cz_data\cz_to_consider_cpp.dta, keep(match) nogen
merge 1:1 cz using dsets\temp_datasets_todelete\temp300.dta, keep(match) nogen

* Interpolate linearly:
forvalues i=1/12 {
gen cz_totwt_ind`i' = (aux1_cz_totwt_ind`i' + aux2_cz_totwt_ind`i')/2
}

keep cz cz_totwt_ind*

gen cz_totwt = 0
forvalues i=1/12 {
replace cz_totwt = cz_totwt + cz_totwt_ind`i'
}

forvalues i=1/12 {
gen cz_ind_share`i' = cz_totwt_ind`i' / cz_totwt
}

egen us_totwt = sum(cz_totwt)
forvalues i=1/12 {
egen us_ind_share`i' = sum(cz_totwt_ind`i' / us_totwt)
}

keep cz cz_ind_share* us_ind_share* cz_totwt_ind*

gen decade = 1890
order decade, first

append using dsets\temp_datasets_todelete\temp_ipums_ind.dta
save dsets\temp_datasets_todelete\temp_ipums_ind.dta, replace

*** 1910:

import delimited using dsets\ipums_data_industries\industry_1910_to_counties90.csv, clear

rename ind50 ind1950

keep if ind1950 > 0
keep if ind1950 <= 936

gen ind1950_coarse = 0
replace ind1950_coarse = 1 if ind1950 >= 105 & ind1950 <= 126
replace ind1950_coarse = 2 if ind1950 >= 206 & ind1950 <= 239
replace ind1950_coarse = 3 if ind1950 == 246
replace ind1950_coarse = 4 if ind1950 >= 306 & ind1950 <= 499
replace ind1950_coarse = 5 if ind1950 >= 506 & ind1950 <= 598
replace ind1950_coarse = 6 if ind1950 >= 606 & ind1950 <= 699
replace ind1950_coarse = 7 if ind1950 >= 716 & ind1950 <= 756
replace ind1950_coarse = 8 if ind1950 >= 806 & ind1950 <= 817
replace ind1950_coarse = 9 if ind1950 >= 826 & ind1950 <= 849
replace ind1950_coarse = 10 if ind1950 >= 856 & ind1950 <= 859
replace ind1950_coarse = 11 if ind1950 >= 868 & ind1950 <= 899
replace ind1950_coarse = 12 if ind1950 >= 906 & ind1950 <= 936

bys cz ind1950_coarse: egen cz_totwt_ind = sum(n_ind50)

duplicates drop cz ind1950_coarse, force
keep cz ind1950_coarse cz_totwt_ind

reshape wide cz_totwt_ind, i(cz) j(ind1950_coarse)

merge 1:1 cz using dsets\cz_data\cz_to_consider_cpp.dta, keep(match) nogen

gen cz_totwt = 0
forvalues i=1/12 {
replace cz_totwt = cz_totwt + cz_totwt_ind`i'
}

forvalues i=1/12 {
gen cz_ind_share`i' = cz_totwt_ind`i' / cz_totwt
}

egen us_totwt = sum(cz_totwt)
forvalues i=1/12 {
egen us_ind_share`i' = sum(cz_totwt_ind`i' / us_totwt)
}

keep cz cz_ind_share* us_ind_share* cz_totwt_ind*

gen decade = 1910
order decade, first

append using dsets\temp_datasets_todelete\temp_ipums_ind.dta
save dsets\temp_datasets_todelete\temp_ipums_ind.dta, replace

*** 1930:

import delimited using dsets\ipums_data_industries\industry_1930_to_counties90.csv, clear

rename ind50 ind1950

keep if ind1950 > 0
keep if ind1950 <= 936

gen ind1950_coarse = 0
replace ind1950_coarse = 1 if ind1950 >= 105 & ind1950 <= 126
replace ind1950_coarse = 2 if ind1950 >= 206 & ind1950 <= 239
replace ind1950_coarse = 3 if ind1950 == 246
replace ind1950_coarse = 4 if ind1950 >= 306 & ind1950 <= 499
replace ind1950_coarse = 5 if ind1950 >= 506 & ind1950 <= 598
replace ind1950_coarse = 6 if ind1950 >= 606 & ind1950 <= 699
replace ind1950_coarse = 7 if ind1950 >= 716 & ind1950 <= 756
replace ind1950_coarse = 8 if ind1950 >= 806 & ind1950 <= 817
replace ind1950_coarse = 9 if ind1950 >= 826 & ind1950 <= 849
replace ind1950_coarse = 10 if ind1950 >= 856 & ind1950 <= 859
replace ind1950_coarse = 11 if ind1950 >= 868 & ind1950 <= 899
replace ind1950_coarse = 12 if ind1950 >= 906 & ind1950 <= 936

bys cz ind1950_coarse: egen cz_totwt_ind = sum(n_ind50)

duplicates drop cz ind1950_coarse, force
keep cz ind1950_coarse cz_totwt_ind

reshape wide cz_totwt_ind, i(cz) j(ind1950_coarse)

merge 1:1 cz using dsets\cz_data\cz_to_consider_cpp.dta, keep(match) nogen

gen cz_totwt = 0
forvalues i=1/12 {
replace cz_totwt = cz_totwt + cz_totwt_ind`i'
}

forvalues i=1/12 {
gen cz_ind_share`i' = cz_totwt_ind`i' / cz_totwt
}

egen us_totwt = sum(cz_totwt)
forvalues i=1/12 {
egen us_ind_share`i' = sum(cz_totwt_ind`i' / us_totwt)
}
 
keep cz cz_ind_share* us_ind_share* cz_totwt_ind*

gen decade = 1930
order decade, first

append using dsets\temp_datasets_todelete\temp_ipums_ind.dta
save dsets\temp_datasets_todelete\temp_ipums_ind.dta, replace

*** 1950:

clear
quietly infix             ///
  int     year     1-4    ///
  long    sample   5-10   ///
  double  serial   11-18  ///
  double  hhwt     19-28  ///
  double  cluster  29-41  ///
  double  strata   42-53  ///
  int     sea      54-56  ///
  byte    gq       57-57  ///
  int     pernum   58-61  ///
  double  perwt    62-71  ///
  int     ind1950  72-74  ///
  using `"dsets\ipums_data_industries\usa_00036.dat"'

replace hhwt    = hhwt    / 100
replace perwt   = perwt   / 100

format serial  %8.0g
format hhwt    %10.2f
format cluster %13.0g
format strata  %12.0g
format perwt   %10.2f

label var year    `"Census year"'
label var sample  `"IPUMS sample identifier"'
label var serial  `"Household serial number"'
label var hhwt    `"Household weight"'
label var cluster `"Household cluster for variance estimation"'
label var strata  `"Household strata for variance estimation"'
label var sea     `"State Economic Area"'
label var gq      `"Group quarters status"'
label var pernum  `"Person number in sample unit"'
label var perwt   `"Person weight"'
label var ind1950 `"Industry, 1950 basis"'

keep sea perwt ind1950
keep if ind1950 > 0
keep if ind1950 <= 936

gen ind1950_coarse = 0
replace ind1950_coarse = 1 if ind1950 >= 105 & ind1950 <= 126
replace ind1950_coarse = 2 if ind1950 >= 206 & ind1950 <= 239
replace ind1950_coarse = 3 if ind1950 == 246
replace ind1950_coarse = 4 if ind1950 >= 306 & ind1950 <= 499
replace ind1950_coarse = 5 if ind1950 >= 506 & ind1950 <= 598
replace ind1950_coarse = 6 if ind1950 >= 606 & ind1950 <= 699
replace ind1950_coarse = 7 if ind1950 >= 716 & ind1950 <= 756
replace ind1950_coarse = 8 if ind1950 >= 806 & ind1950 <= 817
replace ind1950_coarse = 9 if ind1950 >= 826 & ind1950 <= 849
replace ind1950_coarse = 10 if ind1950 >= 856 & ind1950 <= 859
replace ind1950_coarse = 11 if ind1950 >= 868 & ind1950 <= 899
replace ind1950_coarse = 12 if ind1950 >= 906 & ind1950 <= 936

drop ind1950

bys sea ind1950_coarse: egen totwt_ind = sum(perwt)
keep sea ind1950_coarse totwt_ind
duplicates drop
reshape wide totwt_ind, i(sea) j(ind1950_coarse)

save dsets\temp_datasets_todelete\temp200.dta, replace

import delimited using dsets\geo_crosswalks\cw_sea50_cz90.csv, clear
rename start_id sea
rename target_id cz

merge m:1 sea using dsets\temp_datasets_todelete\temp200.dta, keep(match) nogen

forvalues i=1/12 {
bys cz: egen cz_totwt_ind`i' = sum(totwt_ind`i'*intersection_perc)
}

keep cz cz_totwt_ind*
duplicates drop

merge 1:1 cz using dsets\cz_data\cz_to_consider_cpp.dta, keep(match) nogen

gen cz_totwt = 0
forvalues i=1/12 {
replace cz_totwt = cz_totwt + cz_totwt_ind`i'
}

forvalues i=1/12 {
gen cz_ind_share`i' = cz_totwt_ind`i' / cz_totwt
}

egen us_totwt = sum(cz_totwt)
forvalues i=1/12 {
egen us_ind_share`i' = sum(cz_totwt_ind`i' / us_totwt)
}

keep cz cz_ind_share* us_ind_share* cz_totwt_ind*

gen decade = 1950
order decade, first

append using dsets\temp_datasets_todelete\temp_ipums_ind.dta
save dsets\temp_datasets_todelete\temp_ipums_ind.dta, replace

*** 1970:

clear
quietly infix              ///
  int     year      1-4    ///
  long    sample    5-10   ///
  double  serial    11-18  ///
  double  hhwt      19-28  ///
  double  cluster   29-41  ///
  long    cntygp97  42-46  ///
  double  strata    47-58  ///
  byte    gq        59-59  ///
  int     pernum    60-63  ///
  double  perwt     64-73  ///
  int     ind1950   74-76  ///
  using `"dsets\ipums_data_industries\usa_00037.dat"'

replace hhwt     = hhwt     / 100
replace perwt    = perwt    / 100

format serial   %8.0g
format hhwt     %10.2f
format cluster  %13.0g
format strata   %12.0g
format perwt    %10.2f

label var year     `"Census year"'
label var sample   `"IPUMS sample identifier"'
label var serial   `"Household serial number"'
label var hhwt     `"Household weight"'
label var cluster  `"Household cluster for variance estimation"'
label var cntygp97 `"County group, 1970"'
label var strata   `"Household strata for variance estimation"'
label var gq       `"Group quarters status"'
label var pernum   `"Person number in sample unit"'
label var perwt    `"Person weight"'
label var ind1950  `"Industry, 1950 basis"'

keep cntygp97 perwt ind1950
keep if ind1950 > 0
keep if ind1950 <= 936

gen ind1950_coarse = 0
replace ind1950_coarse = 1 if ind1950 >= 105 & ind1950 <= 126
replace ind1950_coarse = 2 if ind1950 >= 206 & ind1950 <= 239
replace ind1950_coarse = 3 if ind1950 == 246
replace ind1950_coarse = 4 if ind1950 >= 306 & ind1950 <= 499
replace ind1950_coarse = 5 if ind1950 >= 506 & ind1950 <= 598
replace ind1950_coarse = 6 if ind1950 >= 606 & ind1950 <= 699
replace ind1950_coarse = 7 if ind1950 >= 716 & ind1950 <= 756
replace ind1950_coarse = 8 if ind1950 >= 806 & ind1950 <= 817
replace ind1950_coarse = 9 if ind1950 >= 826 & ind1950 <= 849
replace ind1950_coarse = 10 if ind1950 >= 856 & ind1950 <= 859
replace ind1950_coarse = 11 if ind1950 >= 868 & ind1950 <= 899
replace ind1950_coarse = 12 if ind1950 >= 906 & ind1950 <= 936

drop ind1950

bys cntygp97 ind1950_coarse: egen totwt_ind = sum(perwt)
keep cntygp97 ind1950_coarse totwt_ind
duplicates drop
reshape wide totwt_ind, i(cntygp97) j(ind1950_coarse)

save dsets\temp_datasets_todelete\temp200.dta, replace

import delimited using dsets\geo_crosswalks\cw_ctygp70_cz90.csv, clear
rename start_id cntygp97
rename target_id cz

merge m:1 cntygp97 using dsets\temp_datasets_todelete\temp200.dta, keep(match) nogen

forvalues i=1/12 {
bys cz: egen cz_totwt_ind`i' = sum(totwt_ind`i'*intersection_perc)
}

keep cz cz_totwt_ind*
duplicates drop

merge 1:1 cz using dsets\cz_data\cz_to_consider_cpp.dta, keep(match) nogen

gen cz_totwt = 0
forvalues i=1/12 {
replace cz_totwt = cz_totwt + cz_totwt_ind`i'
}

forvalues i=1/12 {
gen cz_ind_share`i' = cz_totwt_ind`i' / cz_totwt
}

egen us_totwt = sum(cz_totwt)
forvalues i=1/12 {
egen us_ind_share`i' = sum(cz_totwt_ind`i' / us_totwt)
}

keep cz cz_ind_share* us_ind_share* cz_totwt_ind*

gen decade = 1970
order decade, first

append using dsets\temp_datasets_todelete\temp_ipums_ind.dta
save dsets\temp_datasets_todelete\temp_ipums_ind.dta, replace

*** 1990:

clear
quietly infix              ///
  int     year      1-4    ///
  long    sample    5-10   ///
  double  serial    11-18  ///
  double  hhwt      19-28  ///
  double  cluster   29-41  ///
  byte    statefip  42-43  ///
  long    puma      44-48  ///
  double  strata    49-60  ///
  byte    gq        61-61  ///
  int     pernum    62-65  ///
  double  perwt     66-75  ///
  int     ind1950   76-78  ///
  using `"dsets\ipums_data_industries\usa_00038.dat"'

replace hhwt     = hhwt     / 100
replace perwt    = perwt    / 100

format serial   %8.0g
format hhwt     %10.2f
format cluster  %13.0g
format strata   %12.0g
format perwt    %10.2f

label var year     `"Census year"'
label var sample   `"IPUMS sample identifier"'
label var serial   `"Household serial number"'
label var hhwt     `"Household weight"'
label var cluster  `"Household cluster for variance estimation"'
label var statefip `"State (FIPS code)"'
label var puma     `"Public Use Microdata Area"'
label var strata   `"Household strata for variance estimation"'
label var gq       `"Group quarters status"'
label var pernum   `"Person number in sample unit"'
label var perwt    `"Person weight"'
label var ind1950  `"Industry, 1950 basis"'

keep puma statefip perwt ind1950
keep if ind1950 > 0
keep if ind1950 <= 936

gen ind1950_coarse = 0
replace ind1950_coarse = 1 if ind1950 >= 105 & ind1950 <= 126
replace ind1950_coarse = 2 if ind1950 >= 206 & ind1950 <= 239
replace ind1950_coarse = 3 if ind1950 == 246
replace ind1950_coarse = 4 if ind1950 >= 306 & ind1950 <= 499
replace ind1950_coarse = 5 if ind1950 >= 506 & ind1950 <= 598
replace ind1950_coarse = 6 if ind1950 >= 606 & ind1950 <= 699
replace ind1950_coarse = 7 if ind1950 >= 716 & ind1950 <= 756
replace ind1950_coarse = 8 if ind1950 >= 806 & ind1950 <= 817
replace ind1950_coarse = 9 if ind1950 >= 826 & ind1950 <= 849
replace ind1950_coarse = 10 if ind1950 >= 856 & ind1950 <= 859
replace ind1950_coarse = 11 if ind1950 >= 868 & ind1950 <= 899
replace ind1950_coarse = 12 if ind1950 >= 906 & ind1950 <= 936

drop ind1950

bys puma statefip ind1950_coarse: egen totwt_ind = sum(perwt)
keep puma statefip ind1950_coarse totwt_ind
duplicates drop
reshape wide totwt_ind, i(puma statefip) j(ind1950_coarse)

save dsets\temp_datasets_todelete\temp200.dta, replace

import delimited using dsets\geo_crosswalks\cw_puma90_cz90.csv, clear
rename target_id cz
tostring start_id, gen(start_id_string)
gen len_start_id_string = length(start_id_string)
gen statefip_string = substr(start_id_string,1,1) if len_start_id_string == 6
replace statefip_string = substr(start_id_string,1,2) if len_start_id_string == 7
gen puma_string = substr(start_id_string,3,4) if len_start_id_string == 6
replace puma_string = substr(start_id_string,4,4) if len_start_id_string == 7
destring statefip_string, gen(statefip)
destring puma_string, gen(puma)
drop start_id start_id_string len_start_id_string statefip_string puma_string

merge m:1 puma statefip using dsets\temp_datasets_todelete\temp200.dta, keep(match) nogen

forvalues i=1/12 {
bys cz: egen cz_totwt_ind`i' = sum(totwt_ind`i'*intersection_perc)
}

keep cz cz_totwt_ind*
duplicates drop

merge 1:1 cz using dsets\cz_data\cz_to_consider_cpp.dta, keep(match) nogen

gen cz_totwt = 0
forvalues i=1/12 {
replace cz_totwt = cz_totwt + cz_totwt_ind`i'
}

forvalues i=1/12 {
gen cz_ind_share`i' = cz_totwt_ind`i' / cz_totwt
}

egen us_totwt = sum(cz_totwt)
forvalues i=1/12 {
egen us_ind_share`i' = sum(cz_totwt_ind`i' / us_totwt)
}

keep cz cz_ind_share* us_ind_share* cz_totwt_ind*

gen decade = 1990
order decade, first

append using dsets\temp_datasets_todelete\temp_ipums_ind.dta
save dsets\temp_datasets_todelete\temp_ipums_ind.dta, replace

*** 2010:

clear
quietly infix              ///
  int     year      1-4    ///
  int     multyear  5-8    ///
  long    sample    9-14   ///
  double  serial    15-22  ///
  double  cbserial  23-35  ///
  double  hhwt      36-45  ///
  double  cluster   46-58  ///
  byte    statefip  59-60  ///
  long    puma      61-65  ///
  double  strata    66-77  ///
  byte    gq        78-78  ///
  int     pernum    79-82  ///
  double  perwt     83-92  ///
  int     ind1950   93-95  ///
  using `"dsets\ipums_data_industries\usa_00039.dat"'

replace hhwt     = hhwt     / 100
replace perwt    = perwt    / 100

format serial   %8.0g
format cbserial %13.0g
format hhwt     %10.2f
format cluster  %13.0g
format strata   %12.0g
format perwt    %10.2f

label var year     `"Census year"'
label var multyear `"Actual year of survey, multi-year ACS/PRCS"'
label var sample   `"IPUMS sample identifier"'
label var serial   `"Household serial number"'
label var cbserial `"Original Census Bureau household serial number"'
label var hhwt     `"Household weight"'
label var cluster  `"Household cluster for variance estimation"'
label var statefip `"State (FIPS code)"'
label var puma     `"Public Use Microdata Area"'
label var strata   `"Household strata for variance estimation"'
label var gq       `"Group quarters status"'
label var pernum   `"Person number in sample unit"'
label var perwt    `"Person weight"'
label var ind1950  `"Industry, 1950 basis"'

keep puma statefip perwt ind1950
keep if ind1950 > 0
keep if ind1950 <= 936

gen ind1950_coarse = 0
replace ind1950_coarse = 1 if ind1950 >= 105 & ind1950 <= 126
replace ind1950_coarse = 2 if ind1950 >= 206 & ind1950 <= 239
replace ind1950_coarse = 3 if ind1950 == 246
replace ind1950_coarse = 4 if ind1950 >= 306 & ind1950 <= 499
replace ind1950_coarse = 5 if ind1950 >= 506 & ind1950 <= 598
replace ind1950_coarse = 6 if ind1950 >= 606 & ind1950 <= 699
replace ind1950_coarse = 7 if ind1950 >= 716 & ind1950 <= 756
replace ind1950_coarse = 8 if ind1950 >= 806 & ind1950 <= 817
replace ind1950_coarse = 9 if ind1950 >= 826 & ind1950 <= 849
replace ind1950_coarse = 10 if ind1950 >= 856 & ind1950 <= 859
replace ind1950_coarse = 11 if ind1950 >= 868 & ind1950 <= 899
replace ind1950_coarse = 12 if ind1950 >= 906 & ind1950 <= 936

drop ind1950

bys puma statefip ind1950_coarse: egen totwt_ind = sum(perwt)
keep puma statefip ind1950_coarse totwt_ind
duplicates drop
reshape wide totwt_ind, i(puma statefip) j(ind1950_coarse)

save dsets\temp_datasets_todelete\temp200.dta, replace

import delimited using dsets\geo_crosswalks\cw_puma00_cz90.csv, clear
rename target_id cz
tostring start_id, gen(start_id_string)
gen len_start_id_string = length(start_id_string)
gen statefip_string = substr(start_id_string,1,1) if len_start_id_string == 6
replace statefip_string = substr(start_id_string,1,2) if len_start_id_string == 7
gen puma_string = substr(start_id_string,3,4) if len_start_id_string == 6
replace puma_string = substr(start_id_string,4,4) if len_start_id_string == 7
destring statefip_string, gen(statefip)
destring puma_string, gen(puma)
drop start_id start_id_string len_start_id_string statefip_string puma_string

merge m:1 puma statefip using dsets\temp_datasets_todelete\temp200.dta, keep(match) nogen

forvalues i=1/12 {
bys cz: egen cz_totwt_ind`i' = sum(totwt_ind`i'*intersection_perc)
}

keep cz cz_totwt_ind*
duplicates drop

merge 1:1 cz using dsets\cz_data\cz_to_consider_cpp.dta, keep(match) nogen

gen cz_totwt = 0
forvalues i=1/12 {
replace cz_totwt = cz_totwt + cz_totwt_ind`i'
}

forvalues i=1/12 {
gen cz_ind_share`i' = cz_totwt_ind`i' / cz_totwt
}

egen us_totwt = sum(cz_totwt)
forvalues i=1/12 {
egen us_ind_share`i' = sum(cz_totwt_ind`i' / us_totwt)
}

keep cz cz_ind_share* us_ind_share* cz_totwt_ind*

gen decade = 2010
order decade, first

append using dsets\temp_datasets_todelete\temp_ipums_ind.dta
save dsets\temp_datasets_todelete\temp_ipums_ind.dta, replace

*** Now, construct the shocks:

use dsets\temp_datasets_todelete\temp_ipums_ind.dta, clear

merge m:1 cz using dsets\cz_data\cz_id_cpp.dta, nogen

keep decade cz cz_id cz_totwt_ind* cz_ind_share*

reshape long cz_totwt_ind cz_ind_share, i(cz_id decade) j(industry_id)

save dsets\temp_datasets_todelete\temp40.dta, replace

* Save a wide dataset with decade cz share
keep cz decade industry_id cz_ind_share
reshape wide cz_ind_share, i(cz decade) j(industry_id)
save dsets\temp_datasets_todelete\cz_decade_industry_shares_wide_cpp.dta, replace

use dsets\temp_datasets_todelete\temp40.dta, clear
sum cz_id

forvalues i=1/`r(max)' {

display `i'

use dsets\temp_datasets_todelete\temp40.dta, clear

keep if cz_id != `i'

bys decade: egen totwt_us_decade = sum(cz_totwt_ind)
bys decade industry_id: egen totwt_ind_us = sum(cz_totwt_ind)

gen share_ind_us = totwt_ind_us / totwt_us_decade

keep decade industry_id share_ind_us
duplicates drop

xtset industry_id decade, delta(20)

gen industry_shock = ln(share_ind_us) - ln(l.share_ind_us)

keep decade industry_id industry_shock
keep if decade >= 1870

gen cz_id = `i'

if `i' == 1 {
	save dsets\temp_datasets_todelete\temp20.dta, replace
}
else {
	append using dsets\temp_datasets_todelete\temp20.dta
	save dsets\temp_datasets_todelete\temp20.dta, replace
}
}

use dsets\cz_data\cz_to_consider_cpp.dta, clear
expand = 12
bys cz: gen industry_id = _n
expand = 8
bys cz industry_id: gen decade = 1870 + (_n-1)*20
sort decade cz industry_id
merge m:1 cz using dsets\cz_data\cz_id_cpp.dta, nogen

* First, merge with industry shares:
merge 1:1 cz_id decade industry_id using dsets\temp_datasets_todelete\temp40.dta, keep(match) nogen
keep cz industry_id decade cz_id cz_ind_share
* Now, shift decade to decade+20, since the shares in, say, 1990, must interact with the shock in 2010:
replace decade = decade + 20
* Now, merge with the shocks:
merge 1:1 cz_id decade industry_id using dsets\temp_datasets_todelete\temp20.dta, keep(match) nogen
* Finally, generate the local shock:
bys cz decade: egen local_industry_shock = sum(cz_ind_share*industry_shock)

keep cz decade local_industry_shock

duplicates drop

save dsets\cz_data\industry_shocks_cpp.dta, replace

